%include "Q:\PU2\Macros\hashmerge.sas";

libname input "Q:\External Research Data\AutoPay";
libname output "Q:\PU2\New folder";
libname temp "Q:\PU2\temp";
libname wr "Q:\PU2\wage_rigidity";
options threads cpucount=actual;

*Create a balanced panel of firms within each state-treatment;
	proc summary data=output.firm_list
		(where=(0<=intck("month",'01dec2016'd,yr_month)<=1)) nway;
		class state_bin client_code;
		output out=balanced_firms
			(drop=_type_ rename=(_freq_=n) where=(n=2));	
	run;
	proc summary data=balanced_firms nway;
		class client_code;
		output out=balanced_firms( drop=_type_ _freq_);	
	run;

*Clean a month of data post-2016;
%macro clean2(y=,m=);
	data workers&y.&m.(where=(salaried~=. and 
			(state_bin2=1))
			keep=yr_month state_emp state_bin state_bin2 client_code salaried pay_freq 
			base_pay gross_ern_a ot_a reg_a emp_pur_c tot_hrs_q hrs2_q
			payroll_q imputed_pays emp_hir_dat
			gross gross2 ot reg tot_hrs ot_hrs gender naics rename=(state_emp=state));
		set input.ap_20&y&m(where=(emp_stat_c~="T") 
			rename=(worked_in_st_c=state_emp addr_st_c=state std_rt_type_c=type std_pay_rt_a=std_pay 
				pay_freq_c=pay_freq gen_c=gender naics_code1=naics)
			keep=emp_pur_c yr_month addr_st_c emp_stat_c client_code std_rt_type_c std_pay_rt_a 
			pay_freq_c gross_ern_a ern2_a ern1_a hrs2_q tot_hrs_q
			last_prc_pay_d payroll_q gen_c naics_code1 worked_in_st_c emp_hir_dat);

		*Create state_bins;
			if state~="AK" and state~="CA" and state~="NY" and state~="ME" then state_bin=1;
			if state="AK" then state_bin=2;
			if state="CA" then state_bin=3;
			if state="NY" then state_bin=4;
			if state="ME" then state_bin=5;
			length state_bin 3;

			if state_emp~="AK" and state_emp~="CA" and state_emp~="NY" and state_emp~="ME" then state_bin2=1;
			if state_emp="AK" then state_bin2=2;
			if state_emp="CA" then state_bin2=3;
			if state_emp="NY" then state_bin2=4;
			if state_emp="ME" then state_bin2=5;
			length state_bin2 3;

		*Censor gross_pay;
			if gross_ern_a~=. then gross_ern_a=min(gross_ern_a,20000);

		*Generate salaried;
         	if type="S" then salaried=1;
         	else if type="H" or type="D" then salaried=0;

		*Generate base salary;
        	base_pay=.;
        	if type='S' and pay_freq='2' then base_pay=round(std_pay/2,0.01);
         	if type='S' and pay_freq='4' then base_pay=round(std_pay,0.01);
         	if type='S' and pay_freq='5' then base_pay=round(std_pay,0.01);
         	if type='S' and pay_freq='B' then base_pay=round(std_pay/2,0.01);
         	if type='S' and pay_freq='M' then base_pay=round(std_pay*12/52,0.01);
         	if type='S' and pay_freq='S' then base_pay=round(std_pay*24/52,0.01);
         	if type='S' and pay_freq='W' then base_pay=round(std_pay,0.01);
         	if type='H' then base_pay=round(std_pay*40,0.01);

		*Define OT using the ratio;
			ot_a=0;
			if type~="S" then do;
				imputed_wage=round(ern2_a/(hrs2_q*std_pay),0.05);
				if 0<=abs(imputed_wage)<=2 then ot_a=ern2_a;
			end;
			if type="S" then do;
				imputed_wage=round(ern2_a/(hrs2_q*base_pay/35),0.05);
				if 0<=abs(imputed_wage)<=2 then ot_a=ern2_a;
			end;

		****Definition 1: Normalize by actual number of paychecks****;
			%macro norm_by_weeks(in=,out=);
			if pay_freq='2' or pay_freq='B' then &out.=&in./(payroll_q*2);
			if pay_freq='4' or pay_freq='5' or pay_freq='W' then &out.=&in./(payroll_q);
			if pay_freq='M' then &out.=(&in./payroll_q)*3/13;
			if pay_freq='S' then &out.=(&in./payroll_q)*6/13;
			%mend;
			%norm_by_weeks(in=gross_ern_a,out=gross);
			%norm_by_weeks(in=ot_a,out=ot);
			%norm_by_weeks(in=reg_a,out=reg);
			%norm_by_weeks(in=tot_hrs_q,out=tot_hrs);
			%norm_by_weeks(in=hrs2_q,out=ot_hrs);

		****Definition 2: Impute number of paychecks from day of last paycheck****;
			days=last_prc_pay_d-yr_month;
			weeks=int(days/7);
			if weeks~=. and days<0 then weeks=.;
			if pay_freq='2' or pay_freq='B' then imputed_pays=int(weeks/2)+2;
			if pay_freq='4' or pay_freq='5' or pay_freq='W' then imputed_pays=weeks+2;
			if pay_freq='M' then imputed_pays=1;
			if pay_freq='S' then imputed_pays=2;
			
			if pay_freq='2' or pay_freq='B' then gross2=gross_ern_a/(imputed_pays*2);
			if pay_freq='4' or pay_freq='5' or pay_freq='W' then gross2=gross_ern_a/(imputed_pays);
			if pay_freq='M' then gross2=(gross_ern_a/imputed_pays)*3/13;
			if pay_freq='S' then gross2=(gross_ern_a/imputed_pays)*6/13;

		*Round numbers;
			gross=round(gross);
			ot=round(ot);
			reg=round(reg);
			tot_hrs=round(tot_hrs,0.1);
			ot_hrs=round(ot_hrs,0.1);
	run;

	*Keep a balanced panel of firms;
		%hashmerge(data=balanced_workers&y.&m.,
		one=balanced_firms,	many=workers&y.&m.,
		by=("client_code"),type=inner);

	*Sort for merge;
		proc sort data=balanced_workers&y.&m. nodupkey;
			by client_code emp_pur_c;
		run;

	*Recode the client_codes to save space;
		data balanced_workers&y.&m.;
			merge balanced_workers&y.&m.(in=a) output.client_crosswalk(in=b);
			by client_code;
			if a;
			length client_id 4;
		run;

%mend;

*Clean base;
%macro clean_base(y=,m=);
data base&y.&m.(where=(state_bin2=1)
			keep=yr_month state_bin state_bin2 client_code emp_pur_c cont salaried0 base0);
		set input.ap_20&y&m(where=(emp_stat_c~="T") 
			rename=(worked_in_st_c=state_emp addr_st_c=state std_rt_type_c=type std_pay_rt_a=std_pay pay_freq_c=pay_freq)
			keep=emp_pur_c yr_month addr_st_c emp_stat_c client_code 
			std_rt_type_c std_pay_rt_a pay_freq_c worked_in_st_c);

		*Generate base salary;
        	base0=.;
        	if type='S' and pay_freq='2' then base0=round(std_pay/2,0.01);
         	if type='S' and pay_freq='4' then base0=round(std_pay,0.01);
         	if type='S' and pay_freq='5' then base0=round(std_pay,0.01);
         	if type='S' and pay_freq='B' then base0=round(std_pay/2,0.01);
         	if type='S' and pay_freq='M' then base0=round(std_pay*12/52,0.01);
         	if type='S' and pay_freq='S' then base0=round(std_pay*24/52,0.01);
         	if type='S' and pay_freq='W' then base0=round(std_pay,0.01);
         	if type='H' then base0=round(std_pay*40,0.01);

		*Create state_bins;
			if state~="AK" and state~="CA" and state~="NY" and state~="ME" then state_bin=1;
			if state="AK" then state_bin=2;
			if state="CA" then state_bin=3;
			if state="NY" then state_bin=4;
			if state="ME" then state_bin=5;

			if state_emp~="AK" and state_emp~="CA" and state_emp~="NY" and state_emp~="ME" then state_bin2=1;
			if state_emp="AK" then state_bin2=2;
			if state_emp="CA" then state_bin2=3;
			if state_emp="NY" then state_bin2=4;
			if state_emp="ME" then state_bin2=5;
			length state_bin2 3;

		*Generate salaried;
         	if type="S" then salaried0=1;
         	else if type="H" or type="D" then salaried0=0;

		*Create id for continuously employed;
			cont=1;
			length cont 3;
run;

	*Keep a balanced panel of firms;
		%hashmerge(data=base&y.&m.,
		one=balanced_firms,	many=base&y.&m.,
		by=("client_code"),type=inner);

	*Sort for merge;
		proc sort data=base&y.&m. nodupkey;
			by client_code emp_pur_c;
		run;

	*Recode the client_codes to save space;
		data base&y.&m.;
			merge base&y.&m.(in=a) output.client_crosswalk(in=b);
			by client_code;
			if a;
			length client_id 4;
		run;

%mend;

*Normalize monthly data;
%macro normalize(y=,m=);
	*Merge of number of paychecks;
		%hashmerge(data=collapsed_workers&y.&m.,
		one=output.num_paychecks(keep=yr_month client_code pay_freq num_pay_mean), 
		many=balanced_workers&y.&m.,
		by=("yr_month","client_code","pay_freq"),type=inner, gen=);
	
	*Normalize monthly earnings;
		data collapsed_workers&y.&m.;
			set collapsed_workers&y.&m.(rename=(base_pay=base));
			%macro impute_week(in=,out=);
			&out.=0;
			if pay_freq='B' then &out.=round(&in./(2*num_pay_mean),0.01);
	        if pay_freq='M' or pay_freq='S' then &out.=round(&in.*12/52,0.01);
	        if pay_freq='W' then &out.=round(&in./num_pay_mean,0.01);
			if &out.=. then &out.=0;
			%mend;
			%impute_week(in=gross_ern_a,out=gross3);
			%impute_week(in=ot_a,out=ot3);
			%impute_week(in=reg_a,out=reg3);
			%impute_week(in=tot_hrs_q,out=tot_hrs3);
			%impute_week(in=hrs2_q,out=ot_hrs3);

			*Round numbers;
			base0=round(base0);
			base=round(base);
			gross=round(gross);
			gross2=round(gross2);
			gross3=round(gross3);
			gross_ern_a=round(gross_ern_a);
			ot3=round(ot3);
			reg3=round(reg3);
			ot_hrs3=round(ot_hrs3,0.1);
			tot_hrs3=round(tot_hrs3,0.1);

			*Set lengths;
			length state_bin 3;
			length yr_month base gross3 ot3 reg3 tot_hrs3 ot_hrs3  4;
		run;
%mend;	

*Loop;
%macro loop_clean(y=,m=,start=,end=);
*Define incumbents as employed two months before event time 0;
	%clean_base(y=16,m=12);

*Loop over all months, keeping only incumbents and include an identifier for
	whether the worker is continuously employed;
%do i=&start. %to &end.;
%let _timer_start=%sysfunc(datetime());
	%let a=%eval(&m+&i);
	%let b=%eval(&y+%sysevalf(&a./12,floor));
	%let c=%sysfunc(putn(%sysevalf(1+&a.-12*%sysevalf(&a./12,floor)),z2.));
	*Clean month of data;
		%clean2(y=&b.,m=&c.);

	*Merge and keep only incumbents;
		data balanced_workers&b.&c.;
			merge base1612(in=a drop=yr_month cont) balanced_workers&b.&c.(in=b);
			by client_code emp_pur_c;
			if a=1 and b=1;
		run;
	*Note: this keeps salaried0 and base0 from base;
	*Can easily adjust to keep say, naics or gender;

	*Collapse over all incumbents;
		%normalize(y=&b.,m=&c.);

	*Append to panel;
		proc append base=wr.stayers_panel_2months
			data=collapsed_workers&b.&c. 
		force; run;

	*Delete temporary files;
	proc datasets nolist;
		delete base&y.&m. workers&b.&c. balanced_workers&b.&c. 
		collapsed_workers&b.&c.;
	run;

	data _null_;
		dur=datetime()-&_timer_start;
		put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
	run;
%end;
%mend;

*Define datasets to create;
	data wr.stayers_panel_2months;
		input yr_month salaried0 base0 base gross_ern_a
				gross gross3 ot ot3 salaried cont
				payroll_q;
		format yr_month mmddyy10.;
		format emp_hir_dat mmddyy10.;
		length salaried0 salaried cont payroll_q 3;
		length yr_month client_id base0 base gross gross3 ot ot3 4;
		length gross_ern_a 5;
		length emp_pur_c $ 64;
		length state $ 2;
		length pay_freq $ 1;
		datalines;
	run;
*Create said dataset;
	%loop_clean(y=16,m=12,start=0,end=0);

	proc sort data=wr.stayers_panel_2months;
		by client_id emp_pur_c yr_month;
	run;	
